library(survival)
library(FRESA.CAD)
## Loading required package: Rcpp
## Loading required package: stringr
## Loading required package: miscTools
## Loading required package: Hmisc
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## Loading required package: pROC
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
#library(corrplot)
#source("~/GitHub/FRESA.CAD/R/RRPlot.R")
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
pander::panderOptions('keep.trailing.zeros',TRUE)

source("C:/Users/jtame/Documents/GitHub/RISKPLOTS/CODE/auxplots.R")

RRPLOTS and flchain

odata <- flchain
odata$chapter <- NULL
pander::pander(table(odata$death))
0 1
5705 2169
rownames(odata) <- c(1:nrow(odata))
data <- as.data.frame(model.matrix(Surv(futime,death)~.,odata))

data$`(Intercept)` <- NULL

dataFL <- as.data.frame(cbind(time=odata[rownames(data),"futime"],
                              status=odata[rownames(data),"death"],
                              data))
pander::pander(table(dataFL$status))
0 1
4562 1962
dataFL$time <- dataFL$time/365

Exploring Raw Features with RRPlot

convar <- colnames(dataFL)[lapply(apply(dataFL,2,unique),length) > 10]
convar <- convar[convar != "time"]
topvar <- univariate_BinEnsemble(dataFL[,c("status",convar)],"status")
pander::pander(topvar)
age kappa lambda creatinine
0 0 0 0
topv <- min(5,length(topvar))
topFive <- names(topvar)[1:topv]

topFeature <- RRPlot(cbind(dataFL$status,dataFL[,topFive[1]]),
                  title=topFive[1])

  par(op)

## With Survival Analysis
RRanalysis <- list();
idx <- 1
for (topf in topFive)
{
  RRanalysis[[idx]] <- RRPlot(cbind(dataFL$status,dataFL[,topf]),
                  timetoEvent=dataFL$time,
                  atRate=c(0.90,0.80),
                  title=topf)
  idx <- idx + 1
  par(op)
}

names(RRanalysis) <- topFive

Reporting the Metrics

pander::pander(t(RRanalysis[[1]]$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @:0.8 @MAX_BACC @MAX_RR @SPE100
Thr 73.000 69.000 68.000 53.000 50.00000
RR 4.013 4.399 4.537 5.592 1.00000
RR_LCI 3.740 4.045 4.160 4.406 0.00000
RR_UCI 4.305 4.783 4.947 7.096 0.00000
SEN 0.581 0.713 0.733 0.967 1.00000
SPE 0.883 0.790 0.776 0.216 0.00329
BACC 0.732 0.752 0.755 0.591 0.50164
ROCAUC <- NULL
CstatCI <- NULL
RRatios <- NULL
LogRangp <- NULL
Sensitivity <- NULL
Specificity <- NULL

for (topf in topFive)
{
  CstatCI <- rbind(CstatCI,RRanalysis[[topf]]$c.index$cstatCI)
  RRatios <- rbind(RRatios,RRanalysis[[topf]]$RR_atP)
  LogRangp <- rbind(LogRangp,RRanalysis[[topf]]$surdif$pvalue)
  Sensitivity <- rbind(Sensitivity,RRanalysis[[topf]]$ROCAnalysis$sensitivity)
  Specificity <- rbind(Specificity,RRanalysis[[topf]]$ROCAnalysis$specificity)
  ROCAUC <- rbind(ROCAUC,RRanalysis[[topf]]$ROCAnalysis$aucs)
}
rownames(CstatCI) <- topFive
rownames(LogRangp) <- topFive
rownames(Sensitivity) <- topFive
rownames(Specificity) <- topFive
rownames(ROCAUC) <- topFive

pander::pander(ROCAUC)
  est lower upper
age 0.822 0.810 0.833
kappa 0.682 0.667 0.696
lambda 0.665 0.650 0.680
creatinine 0.589 0.574 0.605
pander::pander(CstatCI)
  mean.C Index median lower upper
age 0.775 0.774 0.764 0.785
kappa 0.671 0.671 0.659 0.684
lambda 0.657 0.657 0.644 0.671
creatinine 0.585 0.585 0.572 0.599
pander::pander(LogRangp)
age 0.00e+00
kappa 4.90e-175
lambda 4.41e-145
creatinine 2.67e-67
pander::pander(Sensitivity)
  est lower upper
age 0.581 0.558 0.602
kappa 0.319 0.298 0.340
lambda 0.300 0.279 0.321
creatinine 0.288 0.269 0.309
pander::pander(Specificity)
  est lower upper
age 0.883 0.873 0.892
kappa 0.900 0.891 0.908
lambda 0.899 0.890 0.907
creatinine 0.865 0.854 0.875
meanMatrix <- cbind(ROCAUC[,1],CstatCI[,1],Sensitivity[,1],Specificity[,1])
colnames(meanMatrix) <- c("ROCAUC","C-Stat","Sen","Spe")
pander::pander(meanMatrix)
  ROCAUC C-Stat Sen Spe
age 0.822 0.775 0.581 0.883
kappa 0.682 0.671 0.319 0.900
lambda 0.665 0.657 0.300 0.899
creatinine 0.589 0.585 0.288 0.865

Train Test Set

trainsamples <- sample(nrow(dataFL),0.5*nrow(dataFL))
dataFLTrain <- dataFL[trainsamples,]
dataFLTest <- dataFL[-trainsamples,]


pander::pander(table(dataFLTrain$status))
0 1
2291 971
pander::pander(table(dataFLTest$status))
0 1
2271 991

Cox Modeling

ml <- BSWiMS.model(Surv(time,status)~.,data=dataFLTrain,loops=0)
sm <- summary(ml)
pander::pander(sm$coefficients)
Table continues below
  Estimate lower mean upper u.Accuracy r.Accuracy
age 0.0985 0.0727 0.0797 0.086 0.718 0.608
flc.grp 0.1042 0.0635 0.0908 0.115 0.605 0.714
creatinine 0.4552 0.3074 0.4103 0.590 0.548 0.719
Table continues below
  full.Accuracy u.AUC r.AUC full.AUC IDI NRI
age 0.725 0.741 0.630 0.75 0.19084 0.86869
flc.grp 0.725 0.631 0.740 0.75 0.01121 0.31941
creatinine 0.725 0.549 0.745 0.75 0.00344 0.00935
  z.IDI z.NRI Delta.AUC Frequency
age 27.02 26.162 -0.12016 1
flc.grp 5.51 8.543 -0.01039 1
creatinine 3.10 0.245 -0.00525 1

Cox Model Performance

The evaluation of the raw Cox model with RRPlot()

timeinterval <- 5

h0 <- sum(dataFLTrain$status & dataFLTrain$time <= timeinterval)
h0 <- h0/sum((dataFLTrain$time > timeinterval) | (dataFLTrain$status==1))

pander::pander(t(c(h0=h0,timeinterval=timeinterval)),caption="Initial Parameters")
Initial Parameters
h0 timeinterval
0.133 5
index <- predict(ml,dataFLTrain)
rdata <- cbind(dataFLTrain$status,ppoisGzero(index,h0))

rrAnalysisTrain <- RRPlot(rdata,atRate=c(0.90,0.80),
                     timetoEvent=dataFLTrain$time,
                     title="Train: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

By Risk Categories

obsexp <- rrAnalysisTrain$OERatio$atThrEstimates

expObs(obsexp,"Train: Expected vs. Observed")

pander::pander(obsexp)
  Observed L.CI H.CI Expected O/E Low Upper pvalue
Total 971 911 1034 1353 0.718 0.673 0.764 9.62e-28
low 270 239 304 352 0.767 0.678 0.864 6.53e-06
90% 139 117 164 186 0.749 0.630 0.885 4.20e-04
80% 562 516 610 813 0.691 0.635 0.751 1.32e-20

Time to Event Analysis

rrAnalysisdata <- rrAnalysisTrain

pander::pander(wilcox.test(rrAnalysisdata$timetoEventData$eTime,rrAnalysisdata$timetoEventData$expectedTime,paired = TRUE))
Wilcoxon signed rank test with continuity correction: rrAnalysisdata$timetoEventData$eTime and rrAnalysisdata$timetoEventData$expectedTime
Test statistic P value Alternative hypothesis
668611 2.93e-300 * * * two.sided
highrisk <- rrAnalysisdata$timetoEventData$class == 2
pander::pander(wilcox.test(rrAnalysisdata$timetoEventData$eTime[highrisk],rrAnalysisdata$timetoEventData$expectedTime[highrisk],paired = TRUE))
Wilcoxon signed rank test with continuity correction: rrAnalysisdata$timetoEventData$eTime[highrisk] and rrAnalysisdata$timetoEventData$expectedTime[highrisk]
Test statistic P value Alternative hypothesis
271344 1.66e-70 * * * two.sided
timesdata <- expObsTime(rrAnalysisdata,title="Train: Expected vs Observed")

pander::pander(timesdata)
Table continues below
  O:Low Risk < 0.181 O:0.181 <= Risk < 0.280
1Q 10.4 7.85
Median 12.4 11.85
3Q 13.2 13.11
Table continues below
  O:High Risk >= 0.280 E:Low Risk < 0.181
1Q 3.14 23.2
Median 7.29 40.4
3Q 11.34 67.8
  E:0.181 <= Risk < 0.280 E:High Risk >= 0.280
1Q 8.94 2.36
Median 10.12 3.93
3Q 11.37 5.69

Test results

index <- predict(ml,dataFLTest)
rtestdata <- cbind(dataFLTest$status,ppoisGzero(index,h0))

rrAnalysisTest <- RRPlot(rtestdata,atRate=c(0.90,0.80),
                     timetoEvent=dataFLTest$time,
                     title="Test: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

By Risk Categories

obsexp <- rrAnalysisTest$OERatio$atThrEstimates

expObs(obsexp,"Test: Expected vs. Observed")

pander::pander(obsexp)
  Observed L.CI H.CI Expected O/E Low Upper pvalue
Total 988 927 1052 1333 0.741 0.696 0.789 4.85e-23
low 260 229 294 322 0.808 0.713 0.913 4.40e-04
90% 157 133 184 167 0.943 0.801 1.102 4.86e-01
80% 574 528 623 845 0.680 0.625 0.738 6.35e-23

Time to Event Analysis

rrAnalysisdata <- rrAnalysisTest

pander::pander(wilcox.test(rrAnalysisdata$timetoEventData$eTime,rrAnalysisdata$timetoEventData$expectedTime,paired = TRUE))
Wilcoxon signed rank test with continuity correction: rrAnalysisdata$timetoEventData$eTime and rrAnalysisdata$timetoEventData$expectedTime
Test statistic P value Alternative hypothesis
566654 0 * * * two.sided
highrisk <- rrAnalysisdata$timetoEventData$class == 2
pander::pander(wilcox.test(rrAnalysisdata$timetoEventData$eTime[highrisk],rrAnalysisdata$timetoEventData$expectedTime[highrisk],paired = TRUE))
Wilcoxon signed rank test with continuity correction: rrAnalysisdata$timetoEventData$eTime[highrisk] and rrAnalysisdata$timetoEventData$expectedTime[highrisk]
Test statistic P value Alternative hypothesis
265800 8.99e-57 * * * two.sided
timesdata <- expObsTime(rrAnalysisdata,title="Test: Expected vs Observed")

pander::pander(timesdata)
Table continues below
  O:Low Risk < 0.164 O:0.164 <= Risk < 0.259
1Q 10.5 6.99
Median 12.4 11.30
3Q 13.2 12.91
Table continues below
  O:High Risk >= 0.259 E:Low Risk < 0.164
1Q 2.93 26.5
Median 6.96 44.0
3Q 11.02 70.7
  E:0.164 <= Risk < 0.259 E:High Risk >= 0.259
1Q 9.81 2.35
Median 11.31 4.05
3Q 12.65 6.11

Cox Calibration

op <- par(no.readonly = TRUE)


calprob <- CoxRiskCalibration(ml,dataFLTrain,"status","time")

( 15.13201 , 17550.98 , 1.272783 , 971 , 1068.674 )

pander::pander(c(h0=calprob$h0,
                 Gain=calprob$hazardGain,
                 DeltaTime=calprob$timeInterval),
               caption="Cox Calibration Parameters")
h0 Gain DeltaTime
0.251 0.691 15.1

The RRplot() of the calibrated model

index <- predict(ml,dataFLTrain)

calrdata <- cbind(dataFLTrain$status,ppoisGzero(index,calprob$h0))


rrAnalysisCalTrain <- RRPlot(calrdata,atRate=c(0.90,0.80),
                     timetoEvent=dataFLTrain$time,
                     title="Cal. Train: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=calprob$timeInterval)

By Risk Categories

obsexp <- rrAnalysisCalTrain$OERatio$atThrEstimates

expObs(obsexp,"Cal: Expected vs. Observed")

pander::pander(obsexp)
  Observed L.CI H.CI Expected O/E Low Upper pvalue
Total 971 911 1034 842 1.15 1.08 1.23 1.29e-05
low 270 239 304 220 1.23 1.09 1.38 9.40e-04
90% 139 117 164 116 1.20 1.01 1.42 3.63e-02
80% 562 516 610 505 1.11 1.02 1.21 1.27e-02

Time to Event Analysis

rrAnalysisdata <- rrAnalysisCalTrain

pander::pander(wilcox.test(rrAnalysisdata$timetoEventData$eTime,rrAnalysisdata$timetoEventData$expectedTime,paired = TRUE))
Wilcoxon signed rank test with continuity correction: rrAnalysisdata$timetoEventData$eTime and rrAnalysisdata$timetoEventData$expectedTime
Test statistic P value Alternative hypothesis
248652 0 * * * two.sided
highrisk <- rrAnalysisdata$timetoEventData$class == 2
pander::pander(wilcox.test(rrAnalysisdata$timetoEventData$eTime[highrisk],rrAnalysisdata$timetoEventData$expectedTime[highrisk],paired = TRUE))
Wilcoxon signed rank test with continuity correction: rrAnalysisdata$timetoEventData$eTime[highrisk] and rrAnalysisdata$timetoEventData$expectedTime[highrisk]
Test statistic P value Alternative hypothesis
193150 2.01e-08 * * * two.sided
timesdata <- expObsTime(rrAnalysisdata,title="Cal: Expected vs Observed")

pander::pander(timesdata)
Table continues below
  O:Low Risk < 0.313 O:0.313 <= Risk < 0.463
1Q 10.4 7.85
Median 12.4 11.85
3Q 13.2 13.11
Table continues below
  O:High Risk >= 0.463 E:Low Risk < 0.313
1Q 3.14 37.2
Median 7.29 64.7
3Q 11.34 108.6
  E:0.313 <= Risk < 0.463 E:High Risk >= 0.463
1Q 14.3 3.78
Median 16.2 6.30
3Q 18.2 9.13

Checking the test set

index <- predict(ml,dataFLTest)
rtestdata <- cbind(dataFLTest$status,ppoisGzero(index,calprob$h0))

rrAnalysisCalTest <- RRPlot(rtestdata,atRate=c(0.90,0.80),
                     timetoEvent=dataFLTest$time,
                     title="Cal. Test: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=calprob$timeInterval)

By Risk Categories

obsexp <- rrAnalysisCalTest$OERatio$atThrEstimates

expObs(obsexp,"Cal Test: Expected vs. Observed")

pander::pander(obsexp)
  Observed L.CI H.CI Expected O/E Low Upper pvalue
Total 988 927 1052 818 1.21 1.13 1.29 8.66e-09
low 260 229 294 201 1.29 1.14 1.46 5.66e-05
90% 157 133 184 104 1.51 1.28 1.77 1.11e-06
80% 574 528 623 513 1.12 1.03 1.21 8.61e-03

Time to Event Analysis

rrAnalysisdata <- rrAnalysisCalTest

pander::pander(wilcox.test(rrAnalysisdata$timetoEventData$eTime,rrAnalysisdata$timetoEventData$expectedTime,paired = TRUE))
Wilcoxon signed rank test with continuity correction: rrAnalysisdata$timetoEventData$eTime and rrAnalysisdata$timetoEventData$expectedTime
Test statistic P value Alternative hypothesis
202579 0 * * * two.sided
highrisk <- rrAnalysisdata$timetoEventData$class == 2
pander::pander(wilcox.test(rrAnalysisdata$timetoEventData$eTime[highrisk],rrAnalysisdata$timetoEventData$expectedTime[highrisk],paired = TRUE))
Wilcoxon signed rank test with continuity correction: rrAnalysisdata$timetoEventData$eTime[highrisk] and rrAnalysisdata$timetoEventData$expectedTime[highrisk]
Test statistic P value Alternative hypothesis
173740 0.0606 two.sided
timesdata <- expObsTime(rrAnalysisdata,title="Cal Test: Expected vs Observed")

pander::pander(timesdata)
Table continues below
  O:Low Risk < 0.287 O:0.287 <= Risk < 0.432
1Q 10.5 6.99
Median 12.4 11.30
3Q 13.2 12.91
Table continues below
  O:High Risk >= 0.432 E:Low Risk < 0.287
1Q 2.93 42.4
Median 6.96 70.5
3Q 11.02 113.2
  E:0.287 <= Risk < 0.432 E:High Risk >= 0.432
1Q 15.7 3.76
Median 18.1 6.49
3Q 20.3 9.79